import pandas as pd
import numpy as np

np.set_printoptions(threshold=2000)#全部输出 
pd.set_option('display.max_colwidth',2000)#全部输出 
pd.set_option('max_colwidth',2000)
pd.set_option('display.max_rows',None)
pd.set_option('display.max_columns',None)


train=pd.read_csv("train.csv")
import pandas as pd
#下面对于小数部分长度过长的特征进行处理
formater="{0:.04f}".format
train[['id_11']]=train[['id_11']].applymap(formater)#这个函数有个毛病，原来是空值会变成字符串nan
train.to_csv("train1.csv",index=False)






#下面還是字典來設置每個特徵的屬性吧，不然使用list的話，太長的話就暈了。

#下面兩個變量data_class以及feature_property全部手動設定
# 因爲決策樹是不知道你的特徵是離散特徵還是連續特徵的。


data_class='isFraud'
feature_property={
                 #'TransactionID':'category',   这一行故意注释掉，因为是纯粹的干扰性质的特征
                 'isFraud':'category',
                 'TransactionDT':'continuous',
                 'TransactionAmt':'continuous',
                 'ProductCD':'category',
                 'card1':'category',
                 'card2':'category',
                 'card3':'category',
                 'card4':'category',
                 'card5':'category',
                 'card6':'category',
                 'addr1':'category',
                 'addr2':'category',
                 'dist1':'continuous',
                 'dist2':'continuous',
                 'P_emaildomain':'category',
                 'R_emaildomain':'category',
                 'C1':'continuous',
                 'C2':'continuous',
                 'C3':'continuous',
                 'C4':'continuous',
                 'C5':'continuous',
                 'C6':'continuous',
                 'C7':'continuous',
                 'C8':'continuous',
                 'C9':'continuous',
                 'C10':'continuous',
                 'C11':'continuous',
                 'C12':'continuous',
                 'C13':'continuous',
                 'C14':'continuous',
                 'D1':'continuous' ,
                 'D2':'continuous',
                 'D3':'continuous',
                 'D4': 'continuous',
                 'D5': 'continuous',
                 'D6':'continuous',
                 'D7':'continuous',
                 'D8':'continuous',
                 'D9':'continuous',
                 'D10':'continuous',
                 'D11':'continuous',
                 'D12':'continuous',
                 'D13':'continuous',
                 'D14':'continuous',
                 'D15':'continuous',
                 'M1':'category',
                 'M2':'category',
                 'M3':'category',
                 'M4':'category',
                 'M5':'category',
                 'M6':'category',
                 'M7':'category',
                 'M8':'category',
                 'M9':'category',
                 'V1':'continuous',
                 'V2':'continuous',
                 'V3':'continuous',
                 'V4':'continuous',
                 'V5':'continuous',
                 'V6':'continuous',
                 'V7':'continuous',
                 'V8':'continuous',
                 'V9':'continuous',
                 'V10':'continuous',
                 'V11':'continuous',
                 'V12':'continuous',
                 'V13':'continuous',
                 'V14':'continuous',
                 'V15':'continuous',
                 'V16':'continuous',
                 'V17':'continuous',
                 'V18':'continuous',
                 'V19':'continuous',
                 'V20':'continuous',
                 'V21':'continuous',
                 'V22':'continuous',
                 'V23':'continuous',
                 'V24':'continuous',
                 'V25':'continuous',
                 'V26':'continuous',
                 'V27':'continuous',
                 'V28':'continuous',
                 'V29':'continuous',
                 'V30':'continuous',
                 'V31':'continuous',
                 'V32':'continuous',
                 'V33':'continuous',
                 'V34':'continuous',
                 'V35':'continuous',
                 'V36':'continuous',
                 'V37':'continuous',
                 'V38':'continuous',
                 'V39':'continuous',
                 'V40':'continuous',
                 'V41':'continuous',
                 'V42':'continuous',
                 'V43':'continuous',
                 'V44':'continuous',
                 'V45':'continuous',
                 'V46':'continuous',
                 'V47':'continuous',
                 'V48':'continuous',
                 'V49':'continuous',
                 'V50':'continuous',
                 'V51':'continuous',
                 'V52':'continuous',
                 'V53':'continuous',
                 'V54':'continuous',
                 'V55':'continuous',
                 'V56':'continuous',
                 'V57':'continuous',
                 'V58':'continuous',
                 'V59':'continuous',
                 'V60':'continuous',
                 'V61':'continuous',
                 'V62':'continuous',
                 'V63':'continuous',
                 'V64':'continuous',
                 'V65':'continuous',
                 'V66':'continuous',
                 'V67':'continuous',
                 'V68':'continuous',
                 'V69':'continuous',
                 'V70':'continuous',
                 'V71':'continuous',
                 'V72':'continuous',
                 'V73':'continuous',
                 'V74':'continuous',
                 'V75':'continuous',
                 'V76':'continuous',
                 'V77':'continuous',
                 'V78':'continuous',
                 'V79':'continuous',
                 'V80':'continuous',
                 'V81':'continuous',
                 'V82':'continuous',
                 'V83':'continuous',
                 'V84':'continuous',
                 'V85':'continuous',
                 'V86':'continuous',
                 'V87':'continuous',
                 'V88':'continuous',
                 'V89':'continuous',
                 'V90':'continuous',
                 'V91':'continuous',
                 'V92':'continuous',
                 'V93':'continuous',
                 'V94':'continuous',
                 'V95':'continuous',
                 'V96':'continuous',
                 'V97':'continuous',
                 'V98':'continuous',
                 'V99':'continuous',
                 'V100':'continuous',
                 'V101':'continuous',
                 'V102':'continuous',
                 'V103':'continuous',
                 'V104':'continuous',
                 'V105':'continuous',
                 'V106':'continuous',
                 'V107':'continuous',
                 'V108':'continuous',
                 'V109':'continuous',
                 'V110':'continuous',
                 'V111':'continuous',
                 'V112':'continuous',
                 'V113':'continuous',
                 'V114':'continuous',
                 'V115':'continuous',
                 'V116':'continuous',
                 'V117':'continuous',
                 'V118':'continuous',
                 'V119':'continuous',
                 'V120':'continuous',
                 'V121':'continuous',
                 'V122':'continuous',
                 'V123':'continuous',
                 'V124':'continuous',
                 'V125':'continuous',
                 'V126':'continuous',
                 'V127':'continuous',
                 'V128':'continuous',
                 'V129':'continuous',
                 'V130':'continuous',
                 'V131':'continuous',
                 'V132':'continuous',
                 'V133':'continuous',
                 'V134':'continuous',
                 'V135':'continuous',
                 'V136':'continuous',
                 'V137':'continuous',
                 'V138':'continuous',
                 'V139':'continuous',
                 'V140':'continuous',
                 'V141':'continuous',
                 'V142':'continuous',
                 'V143':'continuous',
                 'V144':'continuous',
                 'V145':'continuous',
                 'V146':'continuous',
                 'V147':'continuous',
                 'V148':'continuous',
                 'V149':'continuous',
                 'V150':'continuous',
                 'V151':'continuous',
                 'V152':'continuous',
                 'V153':'continuous',
                 'V154':'continuous',
                 'V155':'continuous',
                 'V156':'continuous',
                 'V157':'continuous',
                 'V158':'continuous',
                 'V159':'continuous',
                 'V160':'continuous',
                 'V161':'continuous',
                 'V162':'continuous',
                 'V163':'continuous',
                 'V164':'continuous',
                 'V165':'continuous',
                 'V166':'continuous',
                 'V167':'continuous',
                 'V168':'continuous',
                 'V169':'continuous',
                 'V170':'continuous',
                 'V171':'continuous',
                 'V172':'continuous',
                 'V173':'continuous',
                 'V174':'continuous',
                 'V175':'continuous',
                 'V176':'continuous',
                 'V177':'continuous',
                 'V178':'continuous',
                 'V179':'continuous',
                 'V180':'continuous',
                 'V181':'continuous',
                 'V182':'continuous',
                 'V183':'continuous',
                 'V184':'continuous',
                 'V185':'continuous',
                 'V186':'continuous',
                 'V187':'continuous',
                 'V188':'continuous',
                 'V189':'continuous',
                 'V190':'continuous',
                 'V191':'continuous',
                 'V192':'continuous',
                 'V193':'continuous',
                 'V194':'continuous',
                 'V195':'continuous',
                 'V196':'continuous',
                 'V197':'continuous',
                 'V198':'continuous',
                 'V199':'continuous',
                 'V200':'continuous',
                 'V201':'continuous',
                 'V202':'continuous',
                 'V203':'continuous',
                 'V204':'continuous',
                 'V205':'continuous',
                 'V206':'continuous',
                 'V207':'continuous',
                 'V208':'continuous',
                 'V209':'continuous',
                 'V210':'continuous',
                 'V211':'continuous',
                 'V212':'continuous',
                 'V213':'continuous',
                 'V214':'continuous',
                 'V215':'continuous',
                 'V216':'continuous',
                 'V217':'continuous',
                 'V218':'continuous',
                 'V219':'continuous',
                 'V220':'continuous',
                 'V221':'continuous',
                 'V222':'continuous',
                 'V223':'continuous',
                 'V224':'continuous',
                 'V225':'continuous',
                 'V226':'continuous',
                 'V227':'continuous',
                 'V228':'continuous',
                 'V229':'continuous',
                 'V230':'continuous',
                 'V231':'continuous',
                 'V232':'continuous',
                 'V233':'continuous',
                 'V234':'continuous',
                 'V235':'continuous',
                 'V236':'continuous',
                 'V237':'continuous',
                 'V238':'continuous',
                 'V239':'continuous',
                 'V240':'continuous',
                 'V241':'continuous',
                 'V242':'continuous',
                 'V243':'continuous',
                 'V244':'continuous',
                 'V245':'continuous',
                 'V246':'continuous',
                 'V247':'continuous',
                 'V248':'continuous',
                 'V249':'continuous',
                 'V250':'continuous',
                 'V251':'continuous',
                 'V252':'continuous',
                 'V253':'continuous',
                 'V254':'continuous',
                 'V255':'continuous',
                 'V256':'continuous',
                 'V257':'continuous',
                 'V258':'continuous',
                 'V259':'continuous',
                 'V260':'continuous',
                 'V261':'continuous',
                 'V262':'continuous',
                 'V263':'continuous',
                 'V264':'continuous',
                 'V265':'continuous',
                 'V266':'continuous',
                 'V267':'continuous',
                 'V268':'continuous',
                 'V269':'continuous',
                 'V270':'continuous',
                 'V271':'continuous',
                 'V272':'continuous',
                 'V273':'continuous',
                 'V274':'continuous',
                 'V275':'continuous',
                 'V276':'continuous',
                 'V277':'continuous',
                 'V278':'continuous',
                 'V279':'continuous',
                 'V280':'continuous',
                 'V281':'continuous',
                 'V282':'continuous',
                 'V283':'continuous',
                 'V284':'continuous',
                 'V285':'continuous',
                 'V286':'continuous',
                 'V287':'continuous',
                 'V288':'continuous',
                 'V289':'continuous',
                 'V290':'continuous',
                 'V291':'continuous',
                 'V292':'continuous',
                 'V293':'continuous',
                 'V294':'continuous',
                 'V295':'continuous',
                 'V296':'continuous',
                 'V297':'continuous',
                 'V298':'continuous',
                 'V299':'continuous',
                 'V300':'continuous',
                 'V301':'continuous',
                 'V302':'continuous',
                 'V303':'continuous',
                 'V304':'continuous',
                 'V305':'continuous',
                 'V306':'continuous',
                 'V307':'continuous',
                 'V308':'continuous',
                 'V309':'continuous',
                 'V310':'continuous',
                 'V311':'continuous',
                 'V312':'continuous',
                 'V313':'continuous',
                 'V314':'continuous',
                 'V315':'continuous',
                 'V316':'continuous',
                 'V317':'continuous',
                 'V318':'continuous',
                 'V319':'continuous',
                 'V320':'continuous',
                 'V321':'continuous',
                 'V322':'continuous',
                 'V323':'continuous',
                 'V324':'continuous',
                 'V325':'continuous',
                 'V326':'continuous',
                 'V327':'continuous',
                 'V328':'continuous',
                 'V329':'continuous',
                 'V330':'continuous',
                 'V331':'continuous',
                 'V332':'continuous',
                 'V333':'continuous',
                 'V334':'continuous',
                 'V335':'continuous',
                 'V336':'continuous',
                 'V337':'continuous',
                 'V338':'continuous',
                 'V339':'continuous',
                 'id_01':'category',
                 'id_02':'category',
                 'id_03':'category',
                 'id_04':'category',
                 'id_05':'category',
                 'id_06':'category',
                 'id_07':'category',
                 'id_08':'category',
                 'id_09':'category',
                 'id_10':'category',
                 'id_11':'category',
                 'id_12':'category',
                 'id_13':'category',
                 'id_14':'category',
                 'id_15':'category',
                 'id_16':'category',
                 'id_17':'category',
                 'id_18':'category',
                 'id_19':'category',
                 'id_20':'category',
                 'id_21':'category',
                 'id_22':'category',
                 'id_23':'category',
                 'id_24':'category',
                 'id_25':'category',
                 'id_26':'category',
                 'id_27':'category',
                 'id_28':'category',
                 'id_29':'category',
                 'id_30':'category',
                 'id_31':'category',
                 'id_32':'category',
                 'id_33':'category',
                 'id_34':'category',
                 'id_35':'category',
                 'id_36':'category',
                 'id_37':'category',
                 'id_38':'category',
                 'DeviceType':'category',
                 'DeviceInfo':'category'
                 }
                  

#注意，下面的這個函數中不要隨意添加print語句，因爲會影響生成.names文件的內容
def names_format_class(feature,propertys):
    series=train[feature].value_counts(normalize = False, dropna = False)
    value_lists=''
    for x,y in series.items():#這裏的x是當前特徵的某種取值
        value_lists=value_lists+str(x)+','
    value_lists=value_lists[:-1]+'.'#去掉末尾多餘的逗號，加上句號，爲了滿足格式需要
    print(value_lists)#輸出.names文件中的類別這一行



def names_format_feature(feature,propertys):#这里取名propertys只是为了和python的关键词property区分开来，以免混乱
    if propertys=='continuous':
        print(feature+': continuous.\n')#輸出.names文件中的連續特徵這一行
    if propertys=='category':

        series=train[feature].value_counts(normalize = False, dropna = False)
        # print("输出series看下＝",series)#输出的时候是小数点后六位
        #這個是series類型
        #一个离散列的取值种数
        
        value_lists=str(feature)+": "#爲了滿足.names文件的格式
        
        for x,y in series.items():#這裏的x是當前特徵的某種取值
            value_lists=value_lists+str(x)+','
        value_lists=value_lists[:-1]+'.'
        value_lists=value_lists.replace('nan,','')   #刪除取值列表中的nan
        value_lists=value_lists.replace(',nan','')   #刪除取值列表中的nan(注意nan可能在採集結果中的不同的位置，所以這裏有這麼兩句非常類似的代碼)
        
        print(value_lists)
        print("\n")#輸出.names文件中的離散特徵這一行



#這個文件使用的是：
# IEEE-CIS-Fraud-Detection的數據集

if __name__ == '__main__':
    for (feature,propertys) in feature_property.items():
        if feature==data_class:
            names_format_class(feature,propertys)#處理類別
        else:
            names_format_feature(feature,propertys)#處理特徵
        
                          
# 利用上面的print語句來生成所有.names文件中的內容
# 對於不同的數據集，.names文件也是不同的